{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第1节：数据的排序\n",
    "所有的方法均可以适用于Serise和DataFrame\n",
    "\n",
    "以下每个例子都给出【Serise和DataFrame】2种方案\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   D   E   C\n",
       "c   0   1   2   3   4\n",
       "a   5   6   7   8   9\n",
       "b  10  11  12  13  14\n",
       "d  15  16  17  18  19"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 根据索引进行排序\n",
    "# .sort_index(axis=0, ascending=True)\n",
    "# axis指定轴，ascending指定升序或者降序【默认：0轴，升序】\n",
    "\n",
    "#-------------------------------------------------#\n",
    "# DataFrame例子\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "data = np.arange(20).reshape(4, 5)\n",
    "index = ['c','a','b','d']\n",
    "columns = ['A','B','D','E','C']\n",
    "a = pd.DataFrame(data,index,columns)\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   D   E   C\n",
       "a   5   6   7   8   9\n",
       "b  10  11  12  13  14\n",
       "c   0   1   2   3   4\n",
       "d  15  16  17  18  19"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.sort_index() #【默认的】对索引进行了升序排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>E</th>\n",
       "      <th>D</th>\n",
       "      <th>C</th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>13</td>\n",
       "      <td>12</td>\n",
       "      <td>14</td>\n",
       "      <td>11</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>18</td>\n",
       "      <td>17</td>\n",
       "      <td>19</td>\n",
       "      <td>16</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    E   D   C   B   A\n",
       "c   3   2   4   1   0\n",
       "a   8   7   9   6   5\n",
       "b  13  12  14  11  10\n",
       "d  18  17  19  16  15"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.sort_index(axis=1, ascending=False) #设置了参数，对【1轴、降序】排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    3\n",
       "b    1\n",
       "c    2\n",
       "d   -1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series的sort_index()方法\n",
    "b = pd.Series([2,3,1,-1],['c','a','b','d'])\n",
    "\n",
    "b.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    3\n",
       "c    2\n",
       "b    1\n",
       "d   -1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .sort_values()方法在指定轴上根据数值进行排序，【默认：升序、0轴】\n",
    "\n",
    "# Series.sort_values(axis=0, ascending=True)\n",
    "\n",
    "b = pd.Series([2,3,1,-1],['c','a','b','d'])\n",
    "b.sort_values(axis = 0, ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   D   E   C\n",
       "c   0   1   2   3   4\n",
       "a   5   6   7   8   9\n",
       "b  10  11  12  13  14\n",
       "d  15  16  17  18  19"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# DataFrame.sort_values(by, axis=0, ascending=True)【默认：升序、0轴】\n",
    "# by : axis轴上的某个索引或索引列表\n",
    "data = np.arange(20).reshape(4, 5)\n",
    "index = ['c','a','b','d']\n",
    "columns = ['A','B','D','E','C']\n",
    "a = pd.DataFrame(data,index,columns)\n",
    "\n",
    "a.sort_values(['A','E'], axis=0, ascending=True) #注意'A'和'E'对应的数字"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>18.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      0     1     2     3   4\n",
       "0   0.0   2.0   4.0   6.0 NaN\n",
       "1   9.0  11.0  13.0  15.0 NaN\n",
       "2  18.0  20.0  22.0  24.0 NaN\n",
       "3   NaN   NaN   NaN   NaN NaN"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# NaN统一放到排序末尾\n",
    "a = pd.DataFrame(np.arange(12).reshape(3,4))\n",
    "b = pd.DataFrame(np.arange(20).reshape(4,5))\n",
    "c=a+b\n",
    "c.sort_values(2, axis=1, ascending=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "# 第2节：数据的统计分析\n",
    "![avatar](./image/day4-img4.PNG)\n",
    "![avatar](./image/day4-img5.PNG)\n",
    "![avatar](./image/day4-img6.PNG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>7.500000</td>\n",
       "      <td>8.500000</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>10.500000</td>\n",
       "      <td>11.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>6.454972</td>\n",
       "      <td>6.454972</td>\n",
       "      <td>6.454972</td>\n",
       "      <td>6.454972</td>\n",
       "      <td>6.454972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>3.750000</td>\n",
       "      <td>4.750000</td>\n",
       "      <td>5.750000</td>\n",
       "      <td>6.750000</td>\n",
       "      <td>7.750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>7.500000</td>\n",
       "      <td>8.500000</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>10.500000</td>\n",
       "      <td>11.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>11.250000</td>\n",
       "      <td>12.250000</td>\n",
       "      <td>13.250000</td>\n",
       "      <td>14.250000</td>\n",
       "      <td>15.250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>15.000000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>19.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               0          1          2          3          4\n",
       "count   4.000000   4.000000   4.000000   4.000000   4.000000\n",
       "mean    7.500000   8.500000   9.500000  10.500000  11.500000\n",
       "std     6.454972   6.454972   6.454972   6.454972   6.454972\n",
       "min     0.000000   1.000000   2.000000   3.000000   4.000000\n",
       "25%     3.750000   4.750000   5.750000   6.750000   7.750000\n",
       "50%     7.500000   8.500000   9.500000  10.500000  11.500000\n",
       "75%    11.250000  12.250000  13.250000  14.250000  15.250000\n",
       "max    15.000000  16.000000  17.000000  18.000000  19.000000"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 最强大的.describe()\n",
    "b = pd.DataFrame(np.arange(20).reshape(4,5))\n",
    "b.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    15.0\n",
       "1    16.0\n",
       "2    17.0\n",
       "3    18.0\n",
       "4    19.0\n",
       "Name: max, dtype: float64"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .ix已经被淘汰\n",
    "# 快速获取max值\n",
    "b.describe().loc['max']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "# 第3节：数据的累计分析\n",
    "![avatar](./image/day4-img7.PNG)\n",
    "![avatar](./image/day4-img8.PNG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3   4\n",
       "c   0   1   2   3   4\n",
       "a   5   6   7   8   9\n",
       "d  10  11  12  13  14\n",
       "b  15  16  17  18  19"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = pd.DataFrame(np.arange(20).reshape(4,5),['c','a','d','b'])\n",
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>11</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>15</td>\n",
       "      <td>18</td>\n",
       "      <td>21</td>\n",
       "      <td>24</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>30</td>\n",
       "      <td>34</td>\n",
       "      <td>38</td>\n",
       "      <td>42</td>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1   2   3   4\n",
       "c   0   1   2   3   4\n",
       "a   5   7   9  11  13\n",
       "d  15  18  21  24  27\n",
       "b  30  34  38  42  46"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.cumsum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>15.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>25.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>33.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      0     1     2     3     4\n",
       "c   NaN   NaN   NaN   NaN   NaN\n",
       "a   5.0   7.0   9.0  11.0  13.0\n",
       "d  15.0  17.0  19.0  21.0  23.0\n",
       "b  25.0  27.0  29.0  31.0  33.0"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 相邻的定义，【以该数为终点，向前找数】\n",
    "b.rolling(2).sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "# 第4节：数据的相关分析\n",
    "![avatar](./image/day4-img9.PNG)\n",
    "![avatar](./image/day4-img10.PNG)\n",
    "![avatar](./image/day4-img11.PNG)\n",
    "![avatar](./image/day4-img12.PNG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "      <td>41.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           0          1          2          3          4\n",
       "0  41.666667  41.666667  41.666667  41.666667  41.666667\n",
       "1  41.666667  41.666667  41.666667  41.666667  41.666667\n",
       "2  41.666667  41.666667  41.666667  41.666667  41.666667\n",
       "3  41.666667  41.666667  41.666667  41.666667  41.666667\n",
       "4  41.666667  41.666667  41.666667  41.666667  41.666667"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = pd.DataFrame(np.arange(20).reshape(4,5),['c','a','d','b'])\n",
    "\n",
    "b.cov()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>-0.666667</td>\n",
       "      <td>-0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>-0.666667</td>\n",
       "      <td>-0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.666667</td>\n",
       "      <td>-0.666667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.333333</td>\n",
       "      <td>-0.333333</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4\n",
       "0  1.000000  1.000000  0.333333 -0.666667 -0.333333\n",
       "1  1.000000  1.000000  0.333333 -0.666667 -0.333333\n",
       "2  0.333333  0.333333  1.000000  0.000000  0.333333\n",
       "3 -0.666667 -0.666667  0.000000  1.000000  0.666667\n",
       "4 -0.333333 -0.333333  0.333333  0.666667  1.000000"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# DataFrame.corr(method='pearson')\n",
    "# method：可选值为{‘pearson’, ‘kendall’, ‘spearman’}\n",
    "\n",
    "b = pd.DataFrame(np.random.randn(20).reshape(4,5),['c','a','d','b'])\n",
    "\n",
    "b.corr(method = 'kendall')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.1 64-bit",
   "language": "python",
   "name": "python38164bitfe2ebe8b9a4f493e93d81a74c550882b"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
